
# clear environment
rm(list=ls())

# make sure working directory is set to folder within which this script is saved
getwd()

# load data
rt_data <- read.csv('Negativity+Bias_Summer+2018_October+27,+2021_13.38.csv', stringsAsFactors = F)
lucid_info <- read.csv("Lucid Data Analysis Report.csv", stringsAsFactors = F)

# remove test cases
tests <- tolower(lucid_info$RID[lucid_info$PID == "test"])
rt_data <- rt_data[-which(rt_data$rid %in% tests), ]

#### Data Checks ####

## Survey Time

start_time_code <- strsplit(as.character(rt_data$StartDate), " ")
end_time_code <- strsplit(as.character(rt_data$EndDate), " ")

start_date <- c()
end_date <- c()
start_hour_min <- c()
end_hour_min <- c()

for(i in 1:length(rt_data$StartDate)) {
  start_date <- append(start_date, unlist(start_time_code[[i]])[1])
  end_date <- append(end_date, unlist(end_time_code[[i]])[1])
  start_hour_min <- append(start_hour_min, unlist(start_time_code[[i]])[2])
  end_hour_min <- append(end_hour_min, unlist(end_time_code[[i]])[2])
}

start_hour_min <- strsplit(start_hour_min, ":")
end_hour_min <- strsplit(end_hour_min, ":")

start_hour <- c()
start_minute <- c()
end_hour <- c()
end_minute <- c()

for(i in 1:length(rt_data$StartDate)) {
  start_hour <- append(start_hour, unlist(start_hour_min[[i]])[1])
  start_minute <- append(start_minute, unlist(start_hour_min[[i]])[2])
  end_hour <- append(end_hour, unlist(end_hour_min[[i]])[1])
  end_minute <- append(end_minute, unlist(end_hour_min[[i]])[2])
}

start_hour <- as.numeric(start_hour) * 60
start_minute <- as.numeric(start_minute)
start_time <- start_hour + start_minute

end_hour <- as.numeric(end_hour) * 60
end_minute <- as.numeric(end_minute)
end_time <- end_hour + end_minute

same_day <- ifelse(start_date == end_date, 1, 0)
which(same_day == 0) # respondents did not complete over same day, so times are wrong

rt_data$total_time <- ifelse(same_day == 1, end_time - start_time, NA)


## Validation and Attention Checks

# remove responses outside collection window (8/22-8/24; 20th and 21st pilot tested survey)
rt_data <- rt_data[which(start_date %in% c("8/22/2018","8/23/2018","8/24/2018")), ]
names(table(unique(substr(rt_data$StartDate, 1, 9))))

# who passed consent form
length(which(rt_data$consent_flank == 1 | rt_data$consent_lex == 4 | rt_data$consent_loss == 1))

# who passed bot check
length(which(is.na(rt_data$bot_1) & is.na(rt_data$bot_2) & is.na(rt_data$bot_3) &
    is.na(rt_data$bot_4) & rt_data$bot_5 == 1 & rt_data$bot_6 == 1))

# who passed Trump attention check
length(which(rt_data$trump == 3))

# subset data to only those not kicked out of survey
rt_data <- subset(rt_data, rt_data$trump == 3)

# remove respondents without a Lucid ID
rt_data <- subset(rt_data, !is.na(rt_data$rid))
sum(is.na(rt_data$rid))

# remove duplicate Lucid RIDs
rt_data <- subset(rt_data, !duplicated(rt_data$rid))
sum(duplicated(rt_data$rid))
nrow(rt_data) # 3,356 remaining

# index variable to track R across conditions
rt_data$index <- 1:nrow(rt_data)


#### Coding Loss Aversion Parameter ####

# subset to loss aversion task
la_data <- subset(rt_data, as.character(rt_data$task) == "loss")

# calculate loss aversion
la_data$kwloss <- as.numeric(la_data$x_plus) / (-1 * as.numeric(la_data$x_minus))

# remove estimates that = -Inf
la_data$kwloss <- ifelse(la_data$kwloss == -Inf, NA, la_data$kwloss) 

# standardize
la_data$kwloss_std <- (la_data$kwloss - mean(la_data$kwloss, na.rm = T)) / sd(la_data$kwloss, na.rm = T)

# parameter restricted to 1/3 to 3 for robustness check
la_data$kwloss_3 <- as.numeric(la_data$x_plus) / (-1 * as.numeric(la_data$x_minus))
la_data$kwloss_3 <- ifelse(la_data$kwloss_3 == -Inf, NA, la_data$kwloss_3)
la_data$kwloss_3 <- ifelse(la_data$kwloss_3 < 1/3, NA, la_data$kwloss_3)
la_data$kwloss_3 <- ifelse(la_data$kwloss_3 > 3, NA, la_data$kwloss_3)

# standardize restricted var
la_data$kwloss_3_std <- (la_data$kwloss_3 - mean(la_data$kwloss_3, na.rm = T)) / sd(la_data$kwloss_3, na.rm = T)

## export coded data
write.csv(la_data, "loss_task_data.csv")


#### Coding Lex Parameter ####

# subset data to lex task
lex_data <- subset(rt_data, as.character(rt_data$task) == "lex")
nrow(lex_data) # 1128 respondents

# initialize vectors to store quantities of interest
lex_rt_check <- c() # reaction times
lex_correct_check <- c() # indicator for correct trials
lex_latency_check <- c() # type of word
dscore_vec <- c() # dscores
dscore_correct_vec <- c() # dscores for only correct trials
lex_correct_bysubj <- list() # vectors of indicators for correct trials
lex_bad_rt_check <- c()
lex_bad_acc_check <- c()

# fill in quantities of interest
for(i in 1:nrow(lex_data)) {
  ## converting necessary parameters
  rt_char <- lex_data$rt[i]
  rt_vec <- as.numeric(unlist(strsplit(rt_char, split = ",")))
  rt_vec <- rt_vec[-c(1,2)] # remove two -1's from start
  rt_vec <- append(rt_vec, lex_data$temp_rt[i]) # add final trial
  rt_vec <- rt_vec / 1000 # convert to seconds
  
  correct_char <- lex_data$correct[i]
  correct_vec <- as.numeric(unlist(strsplit(correct_char, split = ",")))
  correct_vec <- correct_vec[-c(1,2)] # remove two -1's from start
  correct_vec <- append(correct_vec, lex_data$temp_correct[i]) # add final trial
  lex_correct_bysubj[[i]] <- correct_vec
  
  word_order_char <- lex_data$word_order_num[i]
  word_order_vec <- as.numeric(unlist(strsplit(word_order_char, split = ",")))
  word_order_vec <- word_order_vec[-1] # remove only one -1 from start
  
  lex_word_latency <- rep(NA, length(word_order_vec)) 
  # -1 if neg word, 1 if pos word, 0 if neutral, -99 if non word
  lex_word_latency <- ifelse(word_order_vec <= 10, -1, lex_word_latency)
  lex_word_latency <- ifelse(word_order_vec > 10 & word_order_vec <= 20, 1, lex_word_latency)
  lex_word_latency <- ifelse(word_order_vec > 20 & word_order_vec <= 30, 0, lex_word_latency)
  lex_word_latency <- ifelse(word_order_vec > 30, -99, lex_word_latency)
  
  ## data quality checks
  lex_bad_acc_check[i] <- mean(correct_vec) < 0.80
  if(mean(correct_vec) < 0.80) { # respondent answered less than 80% correct
    print(paste("Less than 80% Correct for Respondent", i))
    dscore_vec <- append(dscore_vec, NA)
    dscore_correct_vec <- append(dscore_correct_vec, NA)
    correct_vec <- NA
    word_order_vec <- NA
    lex_word_latency <- NA
    next   
  }
  
  bad_rt <- which(rt_vec < 0.2 | rt_vec > 5)
  lex_bad_rt_check[i] <- sum(rt_vec < 0.2 | rt_vec > 5)
  if(length(bad_rt > 0)) {
    rt_vec <- rt_vec[-bad_rt]
    correct_vec <- correct_vec[-bad_rt]
    word_order_vec <- word_order_vec[-bad_rt]
    lex_word_latency <- lex_word_latency[-bad_rt]
  }
  
  ## append totals
  lex_rt_check <- append(lex_rt_check, rt_vec)
  lex_correct_check <- append(lex_correct_check, correct_vec)
  lex_latency_check <- append(lex_latency_check, lex_word_latency)
  
  ## calculating d-score
  # mean & sd of respondent latencies for pos/neg word conditions for in/correct responses
  lex_pos_correct_rt <- c()
  lex_pos_incorrect_rt <- c()
  lex_pos_rt <- c()
  for(j in 1:length(rt_vec)) {
    if(lex_word_latency[j] == 1) # positive words
      lex_pos_rt <- append(lex_pos_rt, rt_vec[j])
    
    if(lex_word_latency[j] == 1 & correct_vec[j] == 1) # positive words, correct
      lex_pos_correct_rt <- append(lex_pos_correct_rt, rt_vec[j])
    
    if(lex_word_latency[j] == 1 & correct_vec[j] == 0) # positive words, incorrect
      lex_pos_incorrect_rt <- append(lex_pos_incorrect_rt, rt_vec[j])
  }
  
  if(!is.numeric(lex_pos_correct_rt)) { # catch for situations where R gets all wrong
    print(paste("Zero Correct Positive Word Answers for Respondent", i))
    dscore_vec <- append(dscore_vec, NA)
    dscore_correct_vec <- append(dscore_correct_vec, NA)
    next
  }
  
  lex_pos_correct_mean <- mean(lex_pos_correct_rt)
  lex_pos_correct_sd <- sd(lex_pos_correct_rt)
  if(is.numeric(lex_pos_incorrect_rt)) {
    lex_pos_incorrect_rt_new <- rep(lex_pos_correct_mean + 2*lex_pos_correct_sd, length(lex_pos_incorrect_rt))
  }
  else {
    lex_pos_incorrect_rt_new <- c()
  }
  lex_pos_mean <- mean(lex_pos_rt)
  
  lex_neg_correct_rt <- c()
  lex_neg_incorrect_rt <- c()
  lex_neg_rt <- c()
  for(j in 1:length(rt_vec)) {
    if(lex_word_latency[j] == -1) # negative words
      lex_neg_rt <- append(lex_neg_rt, rt_vec[j])
    
    if(lex_word_latency[j] == -1 & correct_vec[j] == 1) # negative words, correct
      lex_neg_correct_rt <- append(lex_neg_correct_rt, rt_vec[j])
    
    if(lex_word_latency[j] == -1 & correct_vec[j] == 0) # negative words, incorrect
      lex_neg_incorrect_rt <- append(lex_neg_incorrect_rt, rt_vec[j])
  }
  
  if(!is.numeric(lex_neg_correct_rt)) { # catch for situations where R gets all wrong
    print(paste("Zero Correct Negative Word Answers for Respondent", i))
    dscore_vec <- append(dscore_vec, NA)
    dscore_correct_vec <- append(dscore_correct_vec, NA)
    next
  }
  
  lex_neg_correct_mean <- mean(lex_neg_correct_rt)
  lex_neg_correct_sd <- sd(lex_neg_correct_rt)
  if(is.numeric(lex_neg_incorrect_rt)) {
    lex_neg_incorrect_rt_new <- rep(lex_neg_correct_mean + 2*lex_neg_correct_sd, length(lex_neg_incorrect_rt))
  }
  else {
    lex_neg_incorrect_rt_new <- c()
  }
  lex_neg_mean <- mean(lex_neg_rt)
  
  # std dev of R's response latencies in positive and negative word conditions combined
  lex_posneg <- c(lex_pos_correct_rt, lex_pos_incorrect_rt_new, lex_neg_correct_rt, lex_neg_incorrect_rt_new)
  lex_posneg_sd <- sd(lex_posneg)
  
  # d-score
  lex_pos_mean <- mean(c(lex_pos_correct_rt, lex_pos_incorrect_rt_new))
  lex_neg_mean <- mean(c(lex_neg_correct_rt, lex_neg_incorrect_rt_new))
  lex_dscore <- (lex_pos_mean - lex_neg_mean) / lex_posneg_sd
  dscore_vec <- append(dscore_vec, lex_dscore)
  
  # d-score correct-only
  lex_posneg_correct <- c(lex_pos_correct_rt, lex_neg_correct_rt)
  lex_posneg_correct_sd <- sd(lex_posneg_correct)
  
  lex_dscore_correct <- (lex_pos_correct_mean - lex_neg_correct_mean) /
    lex_posneg_correct_sd
  dscore_correct_vec <- append(dscore_correct_vec, lex_dscore_correct)
  
}

# % of original sample without RT data
no_rt_data <- sapply(lex_correct_bysubj, function(x) -1 %in% x)
sum(no_rt_data) / nrow(lex_data) # 7%
sum(no_rt_data == F) # 1,050 remaining

# summarize accuracy rates for respondents with RT data 
lex_correct_bysubj <- lex_correct_bysubj[no_rt_data == F]
summary(unlist(lapply(lex_correct_bysubj, mean))) # median acc = 95, mean = 92

# summarize % of respondents' trials that were below 200ms or above 5000ms
summary(lex_bad_rt_check) # median number of bad RTs = 0, mean = 0.07

# % of respondents with RT data who had less than 80% of trials correct
sum(lex_bad_acc_check[no_rt_data == F] == 1) / sum(no_rt_data == F) # 6%
sum(lex_bad_acc_check != 1) # 984 remaining

# calculate dscores for analysis and robustness check/correct only
lex_data$dscore <- dscore_vec
lex_data$dscore_correct <- dscore_correct_vec

# standardize
lex_data$dscore_std <- (lex_data$dscore - mean(lex_data$dscore, na.rm = T)) / sd(lex_data$dscore, na.rm = T)
lex_data$dscore_correct_std <- (lex_data$dscore_correct - mean(lex_data$dscore_correct, na.rm = T)) / sd(lex_data$dscore_correct, na.rm = T)

# total # of respondents with dscores available for analysis
sum(!is.na(lex_data$dscore)) # 980 respondents with d-scores

## export
lex_task_info <- data.frame(lex_rt_check, lex_correct_check, lex_latency_check)
write.csv(lex_task_info, 'lex_task_data.csv')

#### Coding Flanker Parameter ####

# subset to flanker data
flank_data <- subset(rt_data, as.character(rt_data$task) == 'flank')
nrow(flank_data) # 1101 respondents

# initialize vectors for quantities of interest
flank_rt_check <- c() # reaction times
flank_correct_check <- c() # indicator for correct trials
flank_image_check <- c() # which type of image on a given trial
dscore_vec <- c() # dscores
dscore_correct_vec <- c() # dscores for correct trials
flk_pos_only <- c() # positive flanker score
flk_neg_only <- c() # negative flanker score
flk_correct_bysubj <- list() # vectors of correct trials by respondent
flk_bad_rt_check <- c()
flk_bad_acc_check <- c()

for(i in 1:nrow(flank_data)) {
  ## converting necessary parameters
  flank_rt_char <- flank_data$rt_flank[i]
  flank_rt_vec <- as.numeric(unlist(strsplit(flank_rt_char, split = ",")))
  flank_rt_vec <- flank_rt_vec[-c(1,2)] # remove two -1's from start
  flank_rt_vec <- append(flank_rt_vec, flank_data$temp_rt_flank[i]) # add final trial
  flank_rt_vec <- flank_rt_vec / 1000 # convert to seconds
  
  flank_correct_char <- flank_data$correct_flank[i]
  flank_correct_vec <- as.numeric(unlist(strsplit(flank_correct_char, split = ",")))
  flank_correct_vec <- flank_correct_vec[-c(1,2)] # remove two -1's from start
  flank_correct_vec <- append(flank_correct_vec, flank_data$temp_correct_flank[i]) # add final trial
  flk_correct_bysubj[[i]] <- flank_correct_vec
  
  image_order_char <- flank_data$image_order[i]
  image_order_vec <- as.numeric(unlist(strsplit(image_order_char, split = ",")))
  image_order_vec <- image_order_vec[-1] # remove only one -1 from start
  
  pos_cong <- c(1,2,6,7,11,12)
  pos_incong <- c(3:5,8:10,13:15)
  neg_cong <- c(19,20,24,25,29,30)
  neg_incong <- c(16:18,21:23,26:28)
  image_cong <- rep(NA, length(image_order_vec)) # 1 = pos-cong, -1 = pos-incong, 2 = neg-con, -2 = neg-incong
  image_cong <- ifelse(image_order_vec %in% pos_cong, 1, image_cong)
  image_cong <- ifelse(image_order_vec %in% pos_incong, -1, image_cong)
  image_cong <- ifelse(image_order_vec %in% neg_cong, 2, image_cong)
  image_cong <- ifelse(image_order_vec %in% neg_incong, -2, image_cong)
  
  ## data quality checks
  flk_bad_acc_check[i] <- mean(flank_correct_vec) < 0.80
  if(mean(flank_correct_vec) < 0.80) { # respondent answered less than 80% correct
    print(paste("Less than 80% Correct for Respondent", i))
    dscore_vec <- append(dscore_vec, NA)
    dscore_correct_vec <- append(dscore_correct_vec, NA)
    flk_pos_only <- append(flk_pos_only, NA)
    flk_neg_only <- append(flk_neg_only, NA)
    flank_rt_vec <- NA
    flank_correct_vec <- NA
    image_cong <- NA
    next   
  }
  
  bad_rt <- which(flank_rt_vec < 0.2 | flank_rt_vec > 5)
  flk_bad_rt_check[i] <- sum(flank_rt_vec < 0.2 | flank_rt_vec > 5)
  if(length(bad_rt > 0)) {
    flank_rt_vec <- flank_rt_vec[-bad_rt]
    flank_correct_vec <- flank_correct_vec[-bad_rt]
    image_cong <- image_cong[-bad_rt]
  }
  
  ## append totals
  flank_rt_check <- append(flank_rt_check, flank_rt_vec)
  flank_correct_check <- append(flank_correct_check, flank_correct_vec)
  flank_image_check <- append(flank_image_check, image_cong)
  
  ## calculating d-score
  # mean & sd of respondent latencies for image type based on in/correct responses
  flk_pos_con_correct_rt <- c()
  flk_pos_con_incorrect_rt <- c()
  flk_pos_con_rt <- c()
  
  flk_pos_incon_correct_rt <- c()
  flk_pos_incon_incorrect_rt <- c()
  flk_pos_incon_rt <- c()
  
  flk_neg_con_correct_rt <- c()
  flk_neg_con_incorrect_rt <- c()
  flk_neg_con_rt <- c()
  
  flk_neg_incon_correct_rt <- c()
  flk_neg_incon_incorrect_rt <- c()
  flk_neg_incon_rt <- c()
  
  for(j in 1:length(flank_rt_vec)) {
    if(image_cong[j] == 1) { # positive congruent
      flk_pos_con_rt <- append(flk_pos_con_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 1)
        flk_pos_con_correct_rt <- append(flk_pos_con_correct_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 0)
        flk_pos_con_incorrect_rt <- append(flk_pos_con_incorrect_rt, flank_rt_vec[j])
    }
    
    if(image_cong[j] == -1) { # positive incongruent
      flk_pos_incon_rt <- append(flk_pos_incon_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 1)
        flk_pos_incon_correct_rt <- append(flk_pos_incon_correct_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 0)
        flk_pos_incon_incorrect_rt <- append(flk_pos_incon_incorrect_rt, flank_rt_vec[j])
    }
    
    if(image_cong[j] == 2) { # negative congruent
      flk_neg_con_rt <- append(flk_neg_con_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 1)
        flk_neg_con_correct_rt <- append(flk_neg_con_correct_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 0)
        flk_neg_con_incorrect_rt <- append(flk_neg_con_incorrect_rt, flank_rt_vec[j])
    }
    
    if(image_cong[j] == -2) { # negative incongruent
      flk_neg_incon_rt <- append(flk_neg_incon_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 1)
        flk_neg_incon_correct_rt <- append(flk_neg_incon_correct_rt, flank_rt_vec[j])
      
      if(flank_correct_vec[j] == 0)
        flk_neg_incon_incorrect_rt <- append(flk_neg_incon_incorrect_rt, flank_rt_vec[j])
    }
  }
  
  if(!(is.numeric(flk_pos_con_correct_rt)) | !(is.numeric(flk_pos_incon_correct_rt)) |
      !(is.numeric(flk_neg_con_correct_rt)) | !(is.numeric(flk_neg_incon_correct_rt))) {
    print(paste("Zero Correct in One of Four Conditions for Respondent", i))
    dscore_vec <- append(dscore_vec, NA)
    dscore_correct_vec <- append(dscore_correct_vec, NA)
    flk_pos_only <- append(flk_pos_only, NA)
    flk_neg_only <- append(flk_neg_only, NA)
    next
  }
  
  flk_pos_con_correct_mean <- mean(flk_pos_con_correct_rt)
  flk_pos_con_correct_sd <- sd(flk_pos_con_correct_rt) ## error when size is 1 because sd returns NA
  flk_pos_incon_correct_mean <- mean(flk_pos_incon_correct_rt)
  flk_pos_incon_correct_sd <- sd(flk_pos_incon_correct_rt)
  flk_neg_con_correct_mean <- mean(flk_neg_con_correct_rt)
  flk_neg_con_correct_sd <- sd(flk_neg_con_correct_rt)
  flk_neg_incon_correct_mean <- mean(flk_neg_incon_correct_rt)
  flk_neg_incon_correct_sd <- sd(flk_neg_incon_correct_rt)
  
  if(is.numeric(flk_pos_con_incorrect_rt)) {
    flk_pos_con_incorrect_rt_new <- rep(flk_pos_con_correct_mean + 2*flk_pos_con_correct_sd,
      length(flk_pos_con_incorrect_rt))
  }
  else {
    flk_pos_con_incorrect_rt_new <- c()
  }
  
  if(is.numeric(flk_pos_incon_incorrect_rt)) {
    flk_pos_incon_incorrect_rt_new <- rep(flk_pos_incon_correct_mean + 2*flk_pos_incon_correct_sd,
      length(flk_pos_incon_incorrect_rt))
  }
  else {
    flk_pos_incon_incorrect_rt_new <- c()
  }
  
  if(is.numeric(flk_neg_con_incorrect_rt)) {
    flk_neg_con_incorrect_rt_new <- rep(flk_neg_con_correct_mean + 2*flk_neg_con_correct_sd,
      length(flk_neg_con_incorrect_rt))
  }
  else {
    flk_neg_con_incorrect_rt_new <- c()
  }
  
  if(is.numeric(flk_neg_incon_incorrect_rt)) {
    flk_neg_incon_incorrect_rt_new <- rep(flk_neg_incon_correct_mean + 2*flk_neg_incon_correct_sd,
      length(flk_neg_incon_incorrect_rt))
  }
  else {
    flk_neg_incon_incorrect_rt_new <- c()
  }
  
  # std dev of R's response latencies in combined conditions
  flk_overall <- c(flk_pos_con_correct_rt, flk_pos_con_incorrect_rt_new,
    flk_pos_incon_correct_rt, flk_pos_incon_incorrect_rt_new,
    flk_neg_con_correct_rt, flk_neg_con_incorrect_rt_new,
    flk_neg_incon_correct_rt, flk_neg_incon_incorrect_rt_new)
  flk_overall_sd <- sd(flk_overall)
  
  # mean response times
  flk_pos_con <- c(flk_pos_con_correct_rt, flk_pos_con_incorrect_rt_new)
  flk_pos_incon <- c(flk_pos_incon_correct_rt, flk_pos_incon_incorrect_rt_new)
  flk_neg_con <- c(flk_neg_con_correct_rt, flk_neg_con_incorrect_rt_new)
  flk_neg_incon <- c(flk_neg_incon_correct_rt, flk_neg_incon_incorrect_rt_new)
  
  flk_pos_con_mean <- mean(flk_pos_con)
  flk_pos_incon_mean <- mean(flk_pos_incon)
  flk_neg_con_mean <- mean(flk_neg_con)
  flk_neg_incon_mean <- mean(flk_neg_incon)
  
  # McLean et al. measure
  flk_pos_only <- append(flk_pos_only, flk_pos_incon_mean - flk_pos_con_mean)
  flk_neg_only <- append(flk_neg_only, flk_neg_incon_mean - flk_neg_con_mean)
  
  # d-score
  flk_dscore <- ((flk_pos_incon_mean - flk_pos_con_mean) - 
      (flk_neg_incon_mean - flk_neg_con_mean)) / flk_overall_sd 
  dscore_vec <- append(dscore_vec, flk_dscore)
  
  # d-score correct-only
  flk_overall_correct <- c(flk_pos_con_correct_rt, flk_pos_incon_correct_rt, 
    flk_neg_con_correct_rt, flk_neg_incon_correct_rt)
  flk_overall_correct_sd <- sd(flk_overall_correct)
  
  flk_dscore_correct <- ((flk_pos_incon_correct_mean - flk_pos_con_correct_mean) - 
      (flk_neg_incon_correct_mean - flk_neg_con_correct_mean)) / flk_overall_correct_sd
  dscore_correct_vec <- append(dscore_correct_vec, flk_dscore_correct)
}

# % of original sample without RT data
flk_no_rt_data <- sapply(flk_correct_bysubj, function(x) -1 %in% x)
sum(flk_no_rt_data) / nrow(flank_data) # 9%
sum(flk_no_rt_data == F) # 1,000 remaining

# summarize accuracy rates for respondents with RT data 
flk_correct_bysubj <- flk_correct_bysubj[flk_no_rt_data == F]
summary(unlist(lapply(flk_correct_bysubj, mean))) # median acc = 97, mean = 92

# summarize % of respondents' trials that were below 200ms or above 5000ms
summary(flk_bad_rt_check) # median number of bad RTs = 0, mean = 0.12

# % of respondents with RT data who had less than 80% of trials correct
sum(flk_bad_acc_check[flk_no_rt_data == F] == 1) / sum(flk_no_rt_data == F) # 10%
sum(flk_bad_acc_check != 1) # 899 remaining

flank_data$flk_pos_only <- flk_pos_only
flank_data$flk_neg_only <- flk_neg_only
flank_data$dscore <- dscore_vec
flank_data$dscore_correct <- dscore_correct_vec

# standardize
flank_data$dscore_std <- (flank_data$dscore - mean(flank_data$dscore, na.rm = T)) / sd(flank_data$dscore, na.rm = T)
flank_data$dscore_correct_std <- (flank_data$dscore_correct - mean(flank_data$dscore_correct, na.rm = T)) / sd(flank_data$dscore_correct, na.rm = T)

# total # of respondents with dscores available for analysis
sum(!is.na(flank_data$dscore)) # 897 respondents with d-scores

## export
flank_task_info <- data.frame(flank_rt_check, flank_correct_check, flank_image_check)
write.csv(flank_task_info, 'flank_task_data.csv')

#### Coding Negativity Bias ####

## this code creates a single variable "neg_bias" that contains the respective measure for each respondent (e.g., flanker vs lex)

# unstandardized
rt_data$neg_bias <- c()
for(i in 1:nrow(rt_data)) {
  if(is.na(as.character(rt_data$task[i]))) {
    rt_data$neg_bias[i] <- NA
    next
  }
  
  if(as.character(rt_data$task[i]) == 'loss') {
    loc <- which(la_data$index == i)
    rt_data$neg_bias[i] <- la_data$kwloss[loc]  
  } else if (as.character(rt_data$task[i]) == 'lex') {
    loc <- which(lex_data$index == i)
    rt_data$neg_bias[i] <- lex_data$dscore[loc] 
  } else if(as.character(rt_data$task[i]) == 'flank') {
    loc <- which(flank_data$index == i)
    rt_data$neg_bias[i] <- flank_data$dscore[loc] 
  }
}

# standardized
rt_data$neg_bias_std <- c()
for(i in 1:nrow(rt_data)) {
  if(is.na(as.character(rt_data$task[i]))) {
    rt_data$neg_bias_std[i] <- NA
    next
  }
  
  if(as.character(rt_data$task[i]) == 'loss') {
    loc <- which(la_data$index == i)
    rt_data$neg_bias_std[i] <- la_data$kwloss_std[loc]  
  } else if (as.character(rt_data$task[i]) == 'lex') {
    loc <- which(lex_data$index == i)
    rt_data$neg_bias_std[i] <- lex_data$dscore_std[loc] 
  } else if(as.character(rt_data$task[i]) == 'flank') {
    loc <- which(flank_data$index == i)
    rt_data$neg_bias_std[i] <- flank_data$dscore_std[loc] 
  }
}

# only correct trials unstandardized
rt_data$dscore_correct <- c()
for(i in 1:nrow(rt_data)) {
  if(is.na(as.character(rt_data$task[i])) | as.character(rt_data$task[i]) == 'loss') {
    rt_data$dscore_correct[i] <- NA
    next
  }
  
  if (as.character(rt_data$task[i]) == 'lex') {
    loc <- which(lex_data$index == i)
    rt_data$dscore_correct[i] <- lex_data$dscore_correct[loc] 
  } else if(as.character(rt_data$task[i]) == 'flank') {
    loc <- which(flank_data$index == i)
    rt_data$dscore_correct[i] <- flank_data$dscore_correct[loc] 
  }
}

# only correct trials standardized
rt_data$dscore_correct_std <- c()
for(i in 1:nrow(rt_data)) {
  if(is.na(as.character(rt_data$task[i])) | as.character(rt_data$task[i]) == 'loss') {
    rt_data$dscore_correct_std[i] <- NA
    next
  }
  
  if (as.character(rt_data$task[i]) == 'lex') {
    loc <- which(lex_data$index == i)
    rt_data$dscore_correct_std[i] <- lex_data$dscore_correct_std[loc] 
  } else if(as.character(rt_data$task[i]) == 'flank') {
    loc <- which(flank_data$index == i)
    rt_data$dscore_correct_std[i] <- flank_data$dscore_correct_std[loc] 
  }
}

# loss aversion restricted to 1/3 to 3 for robustness checks
rt_data$kwloss_3_std <- c()
for(i in 1:nrow(rt_data)) {
  if(is.na(as.character(rt_data$task[i])) | as.character(rt_data$task[i]) != 'loss') {
    rt_data$kwloss_3_std[i] <- NA
    next
  }
  
  if(as.character(rt_data$task[i]) == 'loss') {
    loc <- which(la_data$index == i)
    rt_data$kwloss_3_std[i] <- la_data$kwloss_3_std[loc]  
  }
}

# McLean et al. measures for flanker robustness checks
rt_data$flk_pos_only <- c()
rt_data$flk_neg_only <- c()
for(i in 1:nrow(rt_data)) {
  if(is.na(as.character(rt_data$task[i])) | as.character(rt_data$task[i]) != 'flank') {
    rt_data$flk_pos_only[i] <- NA
    rt_data$flk_neg_only[i] <- NA
    next
  }
  
  if(as.character(rt_data$task[i]) == 'flank') {
    loc <- which(flank_data$index == i)
    rt_data$flk_pos_only[i] <- flank_data$flk_pos_only[loc]
    rt_data$flk_neg_only[i] <- flank_data$flk_neg_only[loc]
  }
}

#### Coding Dependent Variables ####

# all coded such that higher values closer to conservatism expectation

#### Openness ####

## flip questions that were reverse coded
rt_data$ipip1 <- 6 - rt_data$ipip1
rt_data$ipip5 <- 6 - rt_data$ipip5

## average and normalize
open_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$ipip1[i], rt_data$ipip2[i], rt_data$ipip3[i], rt_data$ipip4[i],
    rt_data$ipip5[i])
  
  set <- na.omit(set)
  open_means <- append(open_means, mean(set))
}

rt_data$open <- (open_means - 1) / 4

#### NFC ####

## average and normalize
nfc_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$nfc1[i], rt_data$nfc2[i], rt_data$nfc3[i], rt_data$nfc4[i],
    rt_data$nfc5[i], rt_data$nfc6[i], rt_data$nfc7[i], rt_data$nfc8[i],
    rt_data$nfc9[i], rt_data$nfc10[i])
  
  set <- na.omit(set)
  nfc_means <- append(nfc_means, mean(set))
}

rt_data$nfc <- (nfc_means - 1) / 5


#### Conservation ####

## fix error where schw2-5 6 coded as 8
rt_data$schw2 <- ifelse(rt_data$schw2 == 8, 6, rt_data$schw2)
rt_data$schw3 <- ifelse(rt_data$schw3 == 8, 6, rt_data$schw3)
rt_data$schw4 <- ifelse(rt_data$schw4 == 8, 6, rt_data$schw4)
rt_data$schw5 <- ifelse(rt_data$schw5 == 8, 6, rt_data$schw5)

##flip questions that were reverse coded
rt_data$schw2 <- 7 - rt_data$schw2
rt_data$schw3 <- 7 - rt_data$schw3
rt_data$schw5 <- 7 - rt_data$schw5

## average and normalize
conserve_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$schw1[i], rt_data$schw2[i], rt_data$schw3[i], rt_data$schw4[i],
    rt_data$schw5[i])
  
  set <- na.omit(set)
  conserve_means <- append(conserve_means, mean(set))
}

rt_data$conserve <- (conserve_means - 1) / 5

#### Authoritarianism ####

## flip reverse coded items
rt_data$auth3 <- 3 - rt_data$auth3
rt_data$auth5 <- 3 - rt_data$auth5

## average and normalize
auth_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$auth1[i], rt_data$auth2[i], rt_data$auth3[i], rt_data$auth4[i],
    rt_data$auth5[i])
  
  set <- na.omit(set)
  auth_means <- append(auth_means, mean(set))
}

rt_data$auth <- auth_means - 1

#### Moral Traditionalism ####

## flip reverse coded items
rt_data$trad2 <- 8 - rt_data$trad2
rt_data$trad3 <- 8 - rt_data$trad3

## average and normalize
trad_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$trad1[i], rt_data$trad2[i], rt_data$trad3[i], rt_data$trad4[i])
  
  set <- na.omit(set)
  trad_means <- append(trad_means, mean(set))
}

rt_data$trad <- (trad_means - 1) / 6

#### Limited Gov ####

## flip reverse coded items
rt_data$lim2 <- 3 - rt_data$lim2
rt_data$lim3 <- 3 - rt_data$lim3

## average and normalize
limgov_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$lim1[i], rt_data$lim2[i], rt_data$lim3[i])
  
  set <- na.omit(set)
  limgov_means <- append(limgov_means, mean(set))
}

rt_data$limgov <- limgov_means - 1

#### Social Policy ####

## flip reverse coded items 
gm_recode <- ifelse(rt_data$gay == 1, 1, ifelse(rt_data$gay == 2, 3, 2))
rt_data$gay <- gm_recode
rt_data$abort <- 5 - rt_data$abort

## place all on same scale
rt_data$gay <- (rt_data$gay - 1) / 2
rt_data$affirm <- (rt_data$affirm - 1) / 5
rt_data$imm <- (rt_data$imm - 1) / 3
rt_data$abort <- (rt_data$abort - 1) / 3

## average and normalize
social_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$gay[i], rt_data$affirm[i], rt_data$imm[i], rt_data$abort[i])
  
  set <- na.omit(set)
  social_means <- append(social_means, mean(set))
}

rt_data$social <- social_means

#### Economic Policy ####

## flip reverse coded items
rt_data$ssec <- 7 - rt_data$ssec

## place all on same scale
rt_data$insure <- (rt_data$insure - 1) / 6
rt_data$ssec <- (rt_data$ssec - 1) / 6
rt_data$wage <- (rt_data$wage - 1) / 5
rt_data$tax <- (rt_data$tax - 1) / 3

## average and normalize
economic_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$insure[i], rt_data$ssec[i], rt_data$wage[i], rt_data$tax[i])
  
  set <- na.omit(set)
  economic_means <- append(economic_means, mean(set))
}

rt_data$economic <- economic_means

#### Political Identity ####

## recode pid7
rt_data$pid7 <- ifelse(!is.na(rt_data$dstr), rt_data$dstr, 
  ifelse(!is.na(rt_data$rstr), rt_data$rstr, rt_data$lean))
table(rt_data$pid7[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$pid7[!is.na(rt_data$neg_bias)])) # Dems 45%, Reps 37, Ind 17 

## ideo
table(rt_data$ideo[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$ideo[!is.na(rt_data$neg_bias)])) # Libs 32%, Cons 33, Mods 34 

## average and normalize
pol_id_means <- c()

for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$pid7[i], rt_data$ideo[i])
  
  set <- na.omit(set)
  pol_id_means <- append(pol_id_means, mean(set))
}

rt_data$pol_id <- (pol_id_means - 1) / 6

#### Code Remaining Individual Issues ####

# Military Spending
rt_data$milit <- 7 - rt_data$milit # flip
rt_data$milit <- (rt_data$milit - 1) / 5

# Imports
rt_data$import <- (rt_data$import - 1) / 5

#### Coding Controls ####

## overall sample size (# of respondents who have scores for negativity bias)
sum(!is.na(rt_data$neg_bias)) # 2,431

## Age
rt_data$age <- 2018 - rt_data$born
mean(rt_data$age[!is.na(rt_data$neg_bias)], na.rm = T) # 50.61
rt_data$age <- (rt_data$age - min(rt_data$age, na.rm = T)) / 
               (max(rt_data$age, na.rm = T) - min(rt_data$age, na.rm = T))

## Gender
length(which(rt_data$gender == 3)) # how many select other
rt_data$female <- ifelse(rt_data$gender == 2, 1, 0)
table(rt_data$female[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$female[!is.na(rt_data$neg_bias)])) # 57%

## Black identification
rt_data$black <- ifelse(is.na(rt_data$race_2) | rt_data$race_2 == 0, 0, 1)
table(rt_data$black[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$black[!is.na(rt_data$neg_bias)])) # 12%

## Hispanic identification
rt_data$hisp <- ifelse(is.na(rt_data$race_3) | rt_data$race_3 == 0, 0, 1)
table(rt_data$hisp[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$hisp[!is.na(rt_data$neg_bias)])) # 6%

## Education
table(rt_data$educ[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$educ[!is.na(rt_data$neg_bias)])) # HS or less 25%, Some 38%, BA 25%, Post 11%
rt_data$educ <- (rt_data$educ - min(rt_data$educ, na.rm = T)) / 
  (max(rt_data$educ, na.rm = T) - min(rt_data$educ, na.rm = T))

## Income
table(rt_data$income[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$income[!is.na(rt_data$neg_bias)])) # <10 7%, 10-50 47%, 50-100 32%, 100-150 9%, >150 4%   
rt_data$income <- (rt_data$income - min(rt_data$income, na.rm = T)) / 
  (max(rt_data$income, na.rm = T) - min(rt_data$income, na.rm = T))

## Unemployed
rt_data$unemp <- ifelse(rt_data$employ == 3, 1, 0)
table(rt_data$unemp[!is.na(rt_data$neg_bias)]) / sum(table(rt_data$unemp[!is.na(rt_data$neg_bias)])) # 11%   

#### Coding Political Engagement ####

## flips, recodes, normalizing
rt_data$attention <- (rt_data$attention - 1) / 4

rt_data$news <- rt_data$news / 7

correct1 <- ifelse(rt_data$know1 == 4, 1, 0)
correct2 <- ifelse(rt_data$know2 == 3, 1, 0)
correct3 <- ifelse(rt_data$know3 == 3, 1, 0)
correct4 <- ifelse(rt_data$know4 == 2, 1, 0)
correct5 <- ifelse(rt_data$know5 == 2, 1, 0)
total_corr <- c()
for(i in 1:length(rt_data$know1)) {
  total_corr <- append(total_corr, sum(correct1[i],correct2[i],correct3[i],correct4[i],correct5[i]))
}
rt_data$know <- total_corr / 5

## average
engage_means <- c()
for(i in 1:nrow(rt_data)) {
  set <- c(rt_data$attention[i], rt_data$news[i], rt_data$know[i])
  
  set <- na.omit(set)
  engage_means <- append(engage_means, mean(set))
}

rt_data$engage <- engage_means

#### Export Final Dataset ####

# write to csv
write.csv(rt_data, 'neg_bias_analysis_file_18.csv')

